--- layout: post title: Plot circos use R categories: [cate1, cate2] description: some word here keywords: keyword1, keyword2 mermaid: false sequence: false flow: false mathjax: false mindmap: false mindmap2: false ---
rtracklayerå
ę²”ęå°±ēØBiocManager::install("rtracklayer")å®č£
ļ¼å
ēäøē§å®č£
ę¹ę³ļ¼ååøå¹³å°ļ¼ļ¼CRANćgithubćBioconductor.
对åŗäøē§å®č£
ę¹ę³
install.packages()
remotes::install_github()
BiocManager::install()
é¦å č°·ęęē“¢ęŖå®č£ ēå ęÆååøåØåŖäøŖå¹³å°ļ¼äøč¬ęÆCRANåBioconductorļ¼github大å¤ęÆäøäŗäøŖäŗŗå¼åēå ęęę°ēļ¼äøēسå®ēļ¼ä¼å ååøå°github.
čÆ»å „é¢ęµē¼ē č½åēęä»¶ļ¼č·å¾éē¼ē RNAēID
abun2 <- read_delim("E:/gder/Tes_Epi.fl.count.tsv",col_names = T,delim = "\t")
cpc2 <- read_delim("E:/gder/coding_predict_data/CPC2.txt.txt",delim = "\t",col_names = T,col_select = c(pbid=1,type=8)) %>%
inner_join(.,abun2,by="pbid") %>%
filter(type=="noncoding")
cnci <- read_delim("E:/gder/coding_predict_data/CNCI.index",delim = "\t",col_names = T,col_select = c(pbid=1,type=2)) %>%
inner_join(.,abun2,by="pbid") %>%
filter(type=="noncoding")
cpat <- read_delim("E:/gder/coding_predict_data/cpat.ORF_prob.best.tsv",delim = "\t",col_names = T,col_select = c(pbid=1,prob=11)) %>%
dplyr::mutate(type=ifelse(prob>0.71,"coding","noncoding")) %>% dplyr::select(1,3) %>%
inner_join(.,abun2,by="pbid") %>%
filter(type=="noncoding")
pfam <- read_table("E:/gder/coding_predict_data/pfam.all.txt",skip = 29,col_names = F) %>%
dplyr::mutate(type=ifelse(X13>0.001,"noncoding","coding")) %>%
filter(type=="coding") %>%
dplyr::select(pbid="X1",type) %>%
distinct() %>%
full_join(.,abun2,by="pbid") %>%
filter(is.na(type))
#ęåéē¼ē ēid
gplots::venn(list(CNCI=cnci[cnci$FG.count>0,]$pbid,CPC2=cpc2[cpc2$FG.count>0,]$pbid,Pfam=pfam[pfam$FG.count>0,]$pbid,CPAT=cpat[cpat$FG.count>0,]$pbid),show.plot = F) -> noncod.venn.fg
gplots::venn(list(CNCI=cnci[cnci$GW.count>0,]$pbid,CPC2=cpc2[cpc2$GW.count>0,]$pbid,Pfam=pfam[pfam$GW.count>0,]$pbid,CPAT=cpat[cpat$GW.count>0,]$pbid),show.plot = F) -> noncod.venn.gw
attributes(noncod.venn.fg)$intersections$`CNCI:CPC2:Pfam:CPAT` -> noncod.fg
attributes(noncod.venn.gw)$intersections$`CNCI:CPC2:Pfam:CPAT` -> noncod.gwčÆ»å „ē¬¬äøåę°ę® I; cytoband.dfäøŗęč²ä½éæåŗ¦äæ”ęÆ
cytoband.df <- read.table("E:/gder/Fig6.circos/cytoBandIdeo.txt", colClasses = c("character", "numeric",
"numeric", "character", "character"), sep = "\t")
head(cytoband.df)## V1 V2 V3 V4 V5
## 1 chr1 0 274330532 gneg
## 2 chr10 0 69359453 gneg
## 3 chr11 0 79169978 gneg
## 4 chr12 0 61602749 gneg
## 5 chr13 0 208334590 gneg
## 6 chr14 0 141755446 gneg
čÆ»å „äøä»£č·å¾ē注éäæ”ęÆļ¼åå¤ II å IV åę°ę®ļ¼GWåFGé“å®å°ēisoformļ¼
使ēØrtracklayerå
äøēimport()å½ę°åƼå
„gtfęä»¶
AEMKåMTēęč²ä½å é¤ļ¼åŖäæēäø»č¦ēęč²ä½
sqanti3_class <- read_delim("E:/gder/SQANTI3.mergefinal_classification.txt",col_names = T,delim = "\t")
isoseqgtf <- rtracklayer::import("E:/gder/Fig6.circos/SQANTI3.mergefinal_corrected.gtf")
isoseqgtf <- as.data.frame(isoseqgtf)
isoseqgtf <- isoseqgtf[-grep("AEMK",isoseqgtf$seqnames),]#å»é¤AEMKęč²ä½
isoseqgtf <- isoseqgtf[-grep("MT",isoseqgtf$seqnames),]#å»é¤MTęč²ä½
isoseqgtf$seqnames <- paste("chr",isoseqgtf$seqnames,sep = "")
isoseqgtf %>%
filter(type=="transcript") %>%
inner_join(.,abun2,by=c("transcript_id"="pbid")) %>%
dplyr::select(seqnames,start,end,transcript_id,GW.count) %>%
filter(GW.count>0) -> gw.isoform.bed
isoseqgtf %>%
filter(type=="transcript") %>%
inner_join(.,abun2,by=c("transcript_id"="pbid")) %>%
dplyr::select(seqnames,start,end,transcript_id,FG.count) %>%
filter(FG.count>0) -> fg.isoform.bed
head(fg.isoform.bed)## seqnames start end transcript_id FG.count
## 1 chr1 23811 40028 PB.1.1 14
## 2 chr1 121145 186749 PB.2.1 5
## 3 chr1 121146 186752 PB.2.2 14
## 4 chr1 121404 186759 PB.2.3 4
## 5 chr1 169865 186757 PB.2.4 4
## 6 chr1 575725 578418 PB.3.1 4
åå¤ III å V åę°ę®ļ¼GWåFGé¢ęµå°ēéē¼ē RNA
第äøę„č·å¾ēéē¼ē RNAēidęÆå符串ļ¼čæéč¦ēØtibble()ęas.data.frame()转ę¢äøŗę°ę®ę”ęč½åå¹¶
isoseqgtf %>%
filter(type=="transcript") %>%
inner_join(.,tibble(pbid=noncod.gw),by=c("transcript_id"="pbid")) %>%
dplyr::select(seqnames,start,end,transcript_id) %>%
inner_join(.,sqanti3_class[,c(1,4)],by=c("transcript_id"="isoform")) -> gw.non.bed
isoseqgtf %>%
filter(type=="transcript") %>%
inner_join(.,tibble(pbid=noncod.fg),by=c("transcript_id"="pbid")) %>%
dplyr::select(seqnames,start,end,transcript_id) %>%
inner_join(.,sqanti3_class[,c(1,4)],by=c("transcript_id"="isoform")) -> fg.non.bed
head(fg.non.bed)## seqnames start end transcript_id length
## 1 chr1 6730403 6733062 PB.10.1 2660
## 2 chr1 57651685 57654846 PB.77.1 1075
## 3 chr1 62590470 62590714 PB.78.1 245
## 4 chr1 73119761 73124353 PB.90.2 1835
## 5 chr1 73121192 73124353 PB.90.3 404
## 6 chr1 74757545 74760323 PB.94.1 2779
Ensembl ę°ę®ļ¼åč注éęä»¶ļ¼ē¬¬ VI å
å č½½č¾ę
¢ļ¼éč¦ēå¾
ļ¼ę°ę®č¾å¤§ļ¼å»ŗč®®head()ę„ē
ensegtf <- rtracklayer::import("E:/gder/Fig6.circos/Sus_scrofa.Sscrofa11.1.107.gtf.gz")
ensegtf <- as.data.frame(ensegtf)
ensegtf <- ensegtf[-grep("AEMK",ensegtf$seqnames),]
ensegtf <- ensegtf[-grep("MT",ensegtf$seqnames),]
ensegtf$seqnames <- paste0("chr",ensegtf$seqnames)
ensegtf.trans <- ensegtf[ensegtf$type=="transcript",]
ensegtf.gene <- ensegtf[ensegtf$type=="gene",]
head(ensegtf.gene)## seqnames start end width strand source type score phase
## 1 chr1 226161299 226217308 56010 - ensembl gene NA NA
## 187 chr1 226414306 226432279 17974 + ensembl gene NA NA
## 248 chr1 227574648 227772671 198024 + ensembl gene NA NA
## 351 chr1 227811204 227963309 152106 - ensembl gene NA NA
## 545 chr1 228033996 228045013 11018 - ensembl gene NA NA
## 557 chr1 228054915 228092263 37349 - ensembl gene NA NA
## gene_id gene_version gene_name gene_source gene_biotype
## 1 ENSSSCG00000028996 4 ALDH1A1 ensembl protein_coding
## 187 ENSSSCG00000005267 6 ANXA1 ensembl protein_coding
## 248 ENSSSCG00000005268 5 RORB ensembl protein_coding
## 351 ENSSSCG00000005269 5 TRPM6 ensembl protein_coding
## 545 ENSSSCG00000031382 2 C9orf40 ensembl protein_coding
## 557 ENSSSCG00000005271 5 CARNMT1 ensembl protein_coding
## transcript_id transcript_version transcript_name transcript_source
## 1 <NA> <NA> <NA> <NA>
## 187 <NA> <NA> <NA> <NA>
## 248 <NA> <NA> <NA> <NA>
## 351 <NA> <NA> <NA> <NA>
## 545 <NA> <NA> <NA> <NA>
## 557 <NA> <NA> <NA> <NA>
## transcript_biotype exon_number exon_id exon_version protein_id
## 1 <NA> <NA> <NA> <NA> <NA>
## 187 <NA> <NA> <NA> <NA> <NA>
## 248 <NA> <NA> <NA> <NA> <NA>
## 351 <NA> <NA> <NA> <NA> <NA>
## 545 <NA> <NA> <NA> <NA> <NA>
## 557 <NA> <NA> <NA> <NA> <NA>
## protein_version projection_parent_transcript tag
## 1 <NA> <NA> <NA>
## 187 <NA> <NA> <NA>
## 248 <NA> <NA> <NA>
## 351 <NA> <NA> <NA>
## 545 <NA> <NA> <NA>
## 557 <NA> <NA> <NA>
第 VII åļ¼čååŗå
isoseqgtf %>%
filter(type=="transcript") %>%
inner_join(.,abun2,by=c("transcript_id"="pbid")) %>%
dplyr::select(seqnames,start,end,transcript_id,GW.count,FG.count) %>%
inner_join(.,sqanti3_class[,c(1,6)],by=c("transcript_id"="isoform")) %>%
filter(structural_category=="fusion") %>%
dplyr::select(1:3,5:6) %>%
pivot_longer(cols = 4:5) %>%
filter(value>0) %>%
mutate(va=ifelse(name=="GW.count",1,1.01)) %>%
dplyr::select(1:3,6) -> fusion.bed
head(fusion.bed)## # A tibble: 6 Ć 4
## seqnames start end va
## <chr> <int> <int> <dbl>
## 1 chr1 28968843 29052422 1
## 2 chr1 28968843 29052422 1.01
## 3 chr1 136704410 137163550 1
## 4 chr1 136704412 137163550 1
## 5 chr1 136916922 137163561 1
## 6 chr1 137998583 138191950 1
āčæåŖęÆäøäøŖęØ”ęæ
å®ę¹ę攣åč https://jokergoo.github.io/circlize_book/book
äøę„äøę„čæč”
åŗē°Note: 1 point is out of plotting region in sector āchr5ā, track
ā1ā.å¹¶äøęÆę„éļ¼åŖęÆęę ē¾å å°äŗåå¤ļ¼ę+ mm_y(3.8)å é¤å°±äøä¼åŗē°ę¤note
circos.clear()#ęø
é¤ē»ęæ
circos.par(start.degree = 83, gap.after = c(rep(1, 19), 15),cell.padding = c(0, 0, 0, 0))#建ē«ē»ęæ
circos.initializeWithIdeogram(cytoband.df,plotType = NULL)#åå§å
circos.genomicTrackPlotRegion(
cytoband.df, track.height = 0.04, stack = TRUE, bg.border="black",
panel.fun = function(region, value, ...) {
chr=CELL_META$sector.index
xlim=CELL_META$xlim
ylim=CELL_META$ylim
circos.text(CELL_META$xcenter, CELL_META$ylim[2] + mm_y(3.8), chr, cex = 1, col = "black",
niceFacing = TRUE)
} ,bg.col = "#ccccd6")ę·»å ęč²ä½å°ŗåŗ¦å¹¶ę·»å Ię ē¾
#ęč²ä½å°ŗåŗ¦
brk <- c(0,50,100,150,200,250,300)*1000000
#brk_label<-paste0(c(0,50,100,150,200,250,300),"M")
brk_label<-c(0,50,100,150,200,250,300)
circos.track(track.index = get.current.track.index(),
panel.fun = function(x, y) {
circos.axis(h="top",
major.at=brk,
labels=brk_label,
labels.cex=0.55,
col="black",
labels.col="black",
major.tick.length = mm_y(0.5),
labels.facing="clockwise",)
},
bg.border=F)
circos.text(x = CELL_META$xlim[2] + mm_x(5),y = CELL_META$ycenter,labels = "I",
sector.index = "chrY",track.index = 1,cex = 1.5,col = "black")ę·»å 第äŗå
## II
circos.genomicDensity(gw.isoform.bed[,1:3],col = c("#F08650"), track.height = 0.1,
bg.border = NA,bg.col = NA ,type = "h",overlap = F,count_by = "number",
window.size = 1e5)
circos.text(x = CELL_META$xlim[2] + mm_x(4.5),y = CELL_META$ycenter,labels = "II",
sector.index = "chrY",track.index = 2,cex = 1.5,col = "black")ę·»å 第äøå
# III; GW noncod point
max(fg.non.bed$length,gw.non.bed$length) -> MAX
circos.trackPlotRegion(ylim = c(0, 7000),track.height = 0.08,bg.col = "#f0e9e6", bg.border = NA,
panel.fun = function(x,y) {
for (i in seq(0,7000,1000)) {
circos.lines(c(0,CELL_META$xlim[2]),c(i,i),col = "#939393",lwd = 0.1)
}
})
for (chromosome in unique(gw.non.bed$seqnames)){
circos.points(sector.index = chromosome,
x = gw.non.bed[gw.non.bed$seqnames==chromosome,]$start,
y = gw.non.bed[gw.non.bed$seqnames==chromosome,]$length,
pch = 16,
col = "#F08650",
cex = 0.3,
bg=NA)
}
circos.text(x = CELL_META$xlim[2] + mm_x(4),y = CELL_META$ycenter,labels = "III",
sector.index = "chrY",track.index = 3,cex = 1.5,col = "black")ę·»å 第åå
## IV; FG isoform density
circos.genomicDensity(fg.isoform.bed[,1:3],col = c("#9adafa"), track.height = 0.1,
bg.border = NA,bg.col = NA ,type = "h",overlap = F,count_by = "number",
window.size = 1e5)
circos.text(x = CELL_META$xlim[2] + mm_x(3.5),y = CELL_META$ycenter ,labels = "IV",
sector.index = "chrY",track.index = 4,cex = 1.5,col = "black")ę·»å 第äŗå
# V; FG noncod point
circos.trackPlotRegion(ylim = c(0, 7000),track.height = 0.08,bg.col = "#e9f3fa", bg.border = NA,
panel.fun = function(x,y) {
for (i in seq(0,7000,1000)) {
circos.lines(c(0,CELL_META$xlim[2]),c(i,i),col = "#939393",lwd = 0.1)
}
})
for (chromosome in unique(fg.non.bed$seqnames)){
circos.points(sector.index = chromosome,
x = fg.non.bed[fg.non.bed$seqnames==chromosome,]$start,
y = fg.non.bed[fg.non.bed$seqnames==chromosome,]$length,
pch = 16,
col = "#9adafa",
cex = 0.3,
bg=NA)
}
circos.text(x = CELL_META$xlim[2] + mm_x(3),y = CELL_META$ycenter,labels = "V",
sector.index = "chrY",track.index = 5,cex = 1.5,col = "black")ę·»å 第å å
## VI; Ensembl isoform density
circos.genomicDensity(ensegtf.trans[,1:3],col = c("#8ed3c9"), track.height = 0.1,
bg.border = NA,bg.col = NA ,type = "h",overlap = F,count_by = "number",
window.size = 1e5)
circos.text(x = CELL_META$xlim[2] + mm_x(2.5),y = CELL_META$ycenter,labels = "VI",
sector.index = "chrY",track.index = 6,cex = 1.5,col = "black")ę·»å 第äøå
# VII; fusion
circos.trackPlotRegion(ylim = c(0, 1.01),track.height = 0.08,bg.col = NA, bg.border = NA,
panel.fun = function(x,y) {
circos.lines(c(0,CELL_META$xlim[2]),c(0,0),col = "#D1D1D1",lwd = 0.1)
})
for (chromosome in unique(fusion.bed$seqnames)){
circos.barplot(sector.index = chromosome,
value = fusion.bed[fusion.bed$seqnames==chromosome,]$va,
pos = fusion.bed[fusion.bed$seqnames==chromosome,]$start,
col = ifelse(fusion.bed[fusion.bed$seqnames==chromosome,]$va==1,"#9adafa","#F08650"),
bar_width = 3500000,
border=NA)
}
circos.text(x = CELL_META$xlim[2] + mm_x(2),y = CELL_META$ycenter,labels = "VII",
sector.index = "chrY",track.index = 7,cex = 1.5,col = "black")
grid.raster(pigimg,x = 0.5,y = 0.5,width = 0.12,height = 0.12)#å äøŖå¾